INTRODUCTION

Sierra Object using GSE123904 (Lung Cancer Data using Normal, Tumor and Metastasis Data) prepared using Sierra, Scran and Scater packages. DUTest function is used for differential alternative splicing analysis (via DEXSeq package).

Required Package Import

set.seed(9999)
gtf_gr <- rtracklayer::import("GRCh38.108_ensembl.chr_filtered.gtf")

Pre-Processed Data Import

load("rta_data.RData")

Cell Type Abbreviations

DENDRITIC=Dendritic Cells ENDOTHELIAL=Endothelial Cells EPITHELIAL=Epithelial (and Tumor) Cells FIBROBLAST=Fibroblasts IG=Ig MACROPHAGE=Macrophages MAST=Mast Cells MDSC=Myeloid Derived Suppressor Cells MICROGLIA=Microglia MONOCYTE=Monocyte NEUTROPHIL=Neutrophil NK=Natural Killer Cells NKT=Natural Killer Like T Cells PERICYTE=Pericytes PROLIFERATING=Proliferating Cells Th=T Helper Cells Tm=Memory T Cells Treg=Regulatory T Cells

UMAP and TSNE Plots of Cell Types, Data Source and Cell Labels

plotReducedDim(merged_sce , dimred = "TSNE" , colour_by = "cell_type")

plotReducedDim(merged_sce , dimred = "TSNE" , colour_by = "source")

plotReducedDim(merged_sce , dimred = "TSNE" , colour_by = "cell_label")

Number of Cells in Each Cell Type,Data Source and Cell Label

ggplot(data=as.data.frame(table(merged_sce$cell_type)), aes(x=Var1, y=Freq, fill = Var1, label = Freq)) + geom_bar(stat="identity")+theme_minimal() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + geom_text(size = 5, position = position_stack(vjust = 0.5)) + theme(legend.position = "none")  

ggplot(data=as.data.frame(table(merged_sce$source)), aes(x=Var1, y=Freq, fill = Var1, label = Freq)) + geom_bar(stat="identity")+theme_minimal() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + geom_text(size = 10, position = position_stack(vjust = 0.5))+ theme(legend.position = "none")  

ggplot(data=as.data.frame(table(merged_sce$cell_label)), aes(x=Var1, y=Freq, fill = Var1, label = Freq)) + geom_bar(stat="identity")+theme_minimal() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + geom_text(size = 3, position = position_stack(vjust = 0.5))+ theme(legend.position = "none")  

Cell Labels with more than 500 cells in it

Due to low number of cells in some cell label clusters, statistical analysis will be biased.

kable(as.data.frame(table(merged_sce$cell_label)[table(merged_sce$cell_label) > 500]) , digits = 4, "simple")
Var1 Freq
Breg_Tumor 2458
DENDRITIC_Tumor 697
ENDOTHELIAL_Tumor 776
EPITHELIAL_Met 981
EPITHELIAL_Tumor 1514
MACROPHAGE_Normal 2394
MACROPHAGE_Tumor 981
MAST_Tumor 654
MDSC_Tumor 701
MONOCYTE_Normal 727
MONOCYTE_Tumor 656
NK_Normal 1769
NKT_Met 559
NKT_Normal 1386
NKT_Tumor 1078
PERICYTE_Met 702
Th_Met 1249
Th_Normal 1024
Th_Tumor 7014
Tm_Met 2443
Tm_Normal 2169
Tm_Tumor 5103
Treg_Tumor 838
# Tumor and Metastat is Sample Comparisons (T Cells)

Comparison of Memory T Cells of Tumor and Metastatsis Samples

tm_met_tumor <- DUTest(peaks_sce, 
                    population.1 = "Tm_Met", 
                    population.2 = "Tm_Tumor",
                    exp.thresh = 0.1, 
                    feature.type = c("UTR3", "exon" , "intron"))
## [1] "4263 expressed peaks in feature types UTR3, exon, intron"
## [1] "1042 genes detected with multiple peak sites expressed"
## [1] "3001 individual peak sites to test"
## [1] "Running DEXSeq test..."
tm_met_tumor.top <- subset(tm_met_tumor, abs(Log2_fold_change) > 1)

Results Table (Memory T Cells)

kable(tm_met_tumor.top , digits = 4, "simple")
gene_name genomic_feature(s) population1_pct population2_pct pvalue padj Log2_fold_change
RPS27A:2:55232649-55232899:1 RPS27A UTR5;intron;exon 0.6246 0.6083 0 0 -1.0279
COQ10B:2:197472359-197472832:1 COQ10B intron 0.1347 0.0145 0 0 2.5804
COQ10B:2:197474702-197475278:1 COQ10B UTR3 0.0995 0.1170 0 0 -1.8355
CYBA:16:88648868-88649464:-1 CYBA intron 0.0995 0.1875 0 0 -1.6545
CYBA:16:88648920-88649322:-1 CYBA intron 0.0966 0.1820 0 0 -1.6459
HSP90AA1:14:102081942-102082536:-1 HSP90AA1 intron;exon 0.1772 0.0500 0 0 1.6041
HSPD1:2:197512969-197513353:-1 HSPD1 intron 0.1273 0.0090 0 0 3.3871
SIK3:11:117035701-117036150:-1 SIK3 intron 0.2374 0.1715 0 0 1.3249
KRR1:12:75495784-75496863:-1 KRR1 UTR3 0.0393 0.1282 0 0 -1.4371
KRR1:12:75499261-75506890:-1 KRR1 UTR3 0.1199 0.0790 0 0 1.2529
HMGB1:13:30614865-30615327:-1 HMGB1 intron 0.1371 0.0268 0 0 2.5002
CTLA4:2:203873528-203873965:1 CTLA4 UTR3 0.1347 0.0586 0 0 1.2629
CTLA4:2:203868202-203868704:1 CTLA4 intron 0.0905 0.1225 0 0 -1.2534
PABPC1:8:100719368-100719845:-1 PABPC1 intron;exon 0.0896 0.1879 0 0 -1.0481
PTPRC:1:198680020-198680476:1 PTPRC intron 0.0532 0.1256 0 0 -1.2871
HSPH1:13:31159811-31161149:-1 HSPH1 intron 0.1015 0.0259 0 0 1.7874
RGPD2:2:87805535-87805933:-1 RGPD2 intron 0.0282 0.1117 0 0 -1.2834
PTPRC:1:198665713-198666361:1 PTPRC intron 0.0864 0.1640 0 0 -1.0026
ARHGAP15:2:143155606-143156068:1 ARHGAP15 intron;exon 0.1044 0.0390 0 0 1.0383
de_genes_as_tm <- tm_met_tumor.top$gene_name
go_res_as_tm <- enrichGO(de_genes_as_tm, OrgDb = "org.Hs.eg.db", keyType = "SYMBOL" , ont = "BP")
dotplot(go_res_as_tm , showCategory = 15)

Plots of Differentially Alternative Spliced Regions (Memory T Cells)

tm_coq10b.plot <- rownames(subset(tm_met_tumor.top, gene_name == "COQ10B"))
tm_krr1.plot <- rownames(subset(tm_met_tumor.top, gene_name == "KRR1"))
tm_ctla4.plot <- rownames(subset(tm_met_tumor.top, gene_name == "CTLA4"))
PlotRelativeExpressionTSNE(peaks_sce, peaks.to.plot = tm_coq10b.plot)

PlotRelativeExpressionTSNE(peaks_sce, peaks.to.plot = tm_krr1.plot)

PlotRelativeExpressionTSNE(peaks_sce, peaks.to.plot = tm_ctla4.plot)

PlotRelativeExpressionBox(peaks_sce, peaks.to.plot = tm_coq10b.plot)

PlotRelativeExpressionBox(peaks_sce, peaks.to.plot = tm_krr1.plot)

PlotRelativeExpressionBox(peaks_sce, peaks.to.plot = tm_ctla4.plot)

Coverage Plots (Memory T Cells)

outdir = "bam_subsets/"
bam.files_rps27A_tm <- paste0(outdir, c("Memory_T_Cells_Tumor.RPS27A.bam", "Memory_T_Cells_Metastasis.RPS27A.bam"))
bam.files_coq10b_tm <- paste0(outdir, c("Memory_T_Cells_Tumor.COQ10B.bam", "Memory_T_Cells_Metastasis.COQ10B.bam"))
bam.files_krr1_tm <- paste0(outdir, c("Memory_T_Cells_Tumor.KRR1.bam", "Memory_T_Cells_Metastasis.KRR1.bam"))
bam.files_ctla4_tm <- paste0(outdir, c("Memory_T_Cells_Tumor.CTLA4.bam", "Memory_T_Cells_Metastasis.CTLA4.bam"))
PlotCoverage(genome_gr = gtf_gr, 
             geneSymbol = "RPS27A", 
             genome = "hg38", label.transcripts = TRUE,
             bamfiles = bam.files_rps27A_tm,
             bamfile.tracknames=c("Tumor", "Metastasis"),
             peaks.annot = rownames(subset(tm_met_tumor.top, gene_name == "RPS27A")))

PlotCoverage(genome_gr = gtf_gr, 
             geneSymbol = "COQ10B", 
             genome = "hg38", label.transcripts = TRUE,
             bamfiles = bam.files_coq10b_tm,
             bamfile.tracknames=c("Tumor", "Metastasis"),
             peaks.annot = rownames(subset(tm_met_tumor.top, gene_name == "COQ10B")))

PlotCoverage(genome_gr = gtf_gr, 
             geneSymbol = "KRR1", 
             genome = "hg38", label.transcripts = TRUE,
             bamfiles = bam.files_krr1_tm,
             bamfile.tracknames=c("Tumor", "Metastasis"),
             peaks.annot = rownames(subset(tm_met_tumor.top, gene_name == "KRR1")))

PlotCoverage(genome_gr = gtf_gr, 
             geneSymbol = "CTLA4", 
             genome = "hg38", label.transcripts = TRUE,
             bamfiles = bam.files_ctla4_tm,
             bamfile.tracknames=c("Tumor", "Metastasis"),
             peaks.annot = rownames(subset(tm_met_tumor.top, gene_name == "CTLA4")))

Heatmap of Selected Regions with AS Events and Genes With Selected Regions (Memory T Cells)

peak_counts_matrix <- counts(peaks_sce)
colnames(peak_counts_matrix) <- as.character(peaks_sce$CellIdent)
counts_of_selected_regions <- as.matrix(peak_counts_matrix[as.character(c(tm_coq10b.plot,tm_krr1.plot,tm_ctla4.plot)),])
counts_of_selected_regions <- counts_of_selected_regions[,colnames(peak_counts_matrix) %like% 'Tm_Met|Tm_Tumor']

counts_of_selected_regions <- cbind(rowSums(counts_of_selected_regions[,colnames(counts_of_selected_regions) == "Tm_Tumor"]),rowSums(counts_of_selected_regions[,colnames(counts_of_selected_regions) == "Tm_Met"]))
colnames(counts_of_selected_regions) <- c("Memory T Cells (Tumor)" , "Memory T Cells (Metastasis)")
counts_of_selected_regions <- as.data.frame(melt(counts_of_selected_regions))
colnames(counts_of_selected_regions) <- c("Region" , "Cell_Label" , "Counts")

Heatmap of Counts of Genes With Selected Regions (Memory T Cells)

counts_matrix <- counts(merged_sce)
colnames(counts_matrix) <- as.character(peaks_sce$CellIdent)
counts_of_selected_genes <- as.matrix(counts_matrix[as.character(c("COQ10B","KRR1","CTLA4")),])
counts_of_selected_genes <- counts_of_selected_genes[,colnames(counts_matrix) %like% 'Tm_Met|Tm_Tumor']

counts_of_selected_genes <- cbind(rowSums(counts_of_selected_genes[,colnames(counts_of_selected_genes) == "Tm_Tumor"]),rowSums(counts_of_selected_genes[,colnames(counts_of_selected_genes) == "Tm_Met"]))
colnames(counts_of_selected_genes) <- c("Memory T Cells (Tumor)" , "Memory T Cells (Metastasis)")
counts_of_selected_genes <- as.data.frame(melt(counts_of_selected_genes))
colnames(counts_of_selected_genes) <- c("Gene" , "Cell_Label" , "Counts")
p1 <- ggplot(counts_of_selected_genes, aes(Cell_Label, Gene, fill= Counts , label = Counts)) + geom_tile(color = "black") + scale_fill_gradient(low = "white", high = "red") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + geom_text(size = 2)+ theme(legend.position = "none")

p2 <- ggplot(counts_of_selected_regions, aes(Cell_Label, Region, fill= Counts , label = Counts)) + geom_tile(color = "black") + scale_fill_gradient(low = "white", high = "red") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + geom_text(size = 2)+ theme(legend.position = "none")

grid.arrange(p1, p2, ncol = 2)

Differential Expression Analysis between Memory T Cells between Tumor and Metastasis Samples

load("tm_de_results.RData")
results.classified_tm <- DEsingle::DEtype(results = tm_de_results, threshold = 0.01)
results.sig_tm <- results.classified_tm[results.classified_tm$pvalue.adj.FDR < 0.01, ]
results.DEg_tm <- results.sig_tm[results.sig_tm$Type == "DEg", ]
de_genes_tm <- rownames(results.DEg_tm)
go_res_de_tm <- enrichGO(de_genes_tm, OrgDb = "org.Hs.eg.db", keyType = "SYMBOL" , ont = "BP")
dotplot(go_res_de_tm , showCategory = 15)

Comparison of Helper T Cells of Tumor and Metastatsis Samples

th_met_tumor <- DUTest(peaks_sce, 
                    population.1 = "Th_Met", 
                    population.2 = "Th_Tumor",
                    exp.thresh = 0.1, 
                    feature.type = c("UTR3", "exon" , "intron"))
## [1] "5181 expressed peaks in feature types UTR3, exon, intron"
## [1] "1218 genes detected with multiple peak sites expressed"
## [1] "3825 individual peak sites to test"
## [1] "Running DEXSeq test..."
th_met_tumor.top <- subset(th_met_tumor, abs(Log2_fold_change) > 1)

Results Table (T Helper Cells)

kable(th_met_tumor.top, digits = 4)
gene_name genomic_feature(s) population1_pct population2_pct pvalue padj Log2_fold_change
RPS27A:2:55232649-55232899:1 RPS27A UTR5;intron;exon 0.3875 0.6139 0 0 -2.1913
HSPA1B:6:31828867-31829767:1 HSPA1B exon 0.1265 0.4224 0 0 -2.0355
TNFAIP3:6:137868239-137868765:1 TNFAIP3 intron 0.3923 0.4672 0 0 1.1412
HSPA1B:6:31829657-31829729:1 HSPA1B exon 0.0849 0.3474 0 0 -2.2398
HSPA1B:6:31829660-31829709:1 HSPA1B exon 0.0745 0.3251 0 0 -2.3069
CTLA4:2:203873528-203873965:1 CTLA4 UTR3 0.2738 0.0853 0 0 1.0296
CTLA4:2:203868202-203868704:1 CTLA4 intron 0.1089 0.1347 0 0 -1.6298
SERF2:15:43792311-43792377:1 SERF2 UTR5;exon 0.1649 0.3332 0 0 -1.2178
SIK3:11:117035701-117036150:-1 SIK3 intron 0.2706 0.2066 0 0 1.2743
COQ10B:2:197472359-197472832:1 COQ10B intron 0.1153 0.0442 0 0 1.6068
SERF2:15:43792323-43792335:1 SERF2 UTR5;exon 0.0929 0.2275 0 0 -1.3992
HSPD1:2:197512969-197513353:-1 HSPD1 intron 0.1577 0.0488 0 0 2.0118
HLA-DQA1:6:32642964-32643102:1 HLA-DQA1 UTR3 0.2090 0.0632 0 0 1.2836
RSRC2:12:122525695-122526454:-1 RSRC2 UTR5;intron;exon 0.1209 0.0315 0 0 2.0915
ANXA1:9:73167471-73169062:1 ANXA1 intron;exon 0.0785 0.2298 0 0 -1.0252
HMGB1:13:30614865-30615327:-1 HMGB1 intron 0.1393 0.0647 0 0 1.6904
AMBRA1:11:46429015-46429153:-1 AMBRA1 intron 0.0256 0.1014 0 0 -2.1893
LDLRAD4:18:13645330-13645842:1 LDLRAD4 UTR3 0.0384 0.1473 0 0 -1.4014
HSPA8:11:123057857-123058532:-1 HSPA8 intron;exon 0.0945 0.3044 0 0 -1.1599
CYBA:16:88648920-88649322:-1 CYBA intron 0.1025 0.1583 0 0 -1.2269
CYBA:16:88648868-88649464:-1 CYBA intron 0.1073 0.1660 0 0 -1.2370
RPL6:12:112406330-112406921:-1 RPL6 intron;exon 0.1217 0.2448 0 0 -1.0156
ELOVL5:6:53294679-53295146:-1 ELOVL5 intron 0.0256 0.1153 0 0 -1.7868
SCML4:6:107704636-107705095:-1 SCML4 UTR3 0.0608 0.1056 0 0 -1.0972
HSP90AA1:14:102085986-102086154:-1 HSP90AA1 UTR3 0.0440 0.1587 0 0 -1.4266
RPS13:11:17077411-17077709:-1 RPS13 UTR5;intron;exon 0.0424 0.1212 0 0 -1.4533
SARAF:8:30063445-30063799:-1 SARAF UTR3 0.0232 0.1236 0 0 -1.6024
SPOCK2:10:72086250-72086830:-1 SPOCK2 UTR3 0.0336 0.1035 0 0 -1.2780
SQSTM1:5:179836655-179836853:1 SQSTM1 UTR3 0.0520 0.1635 0 0 -1.1312
KTN1:14:55580202-55580250:1 KTN1 UTR5;intron;exon 0.0368 0.1319 0 0 -1.1552
BCL11B:14:99272050-99272197:-1 BCL11B UTR5;exon 0.0320 0.1156 0 0 -1.1209
de_genes_as_th <- th_met_tumor.top$gene_name
go_res_as_th <- enrichGO(de_genes_as_th, OrgDb = "org.Hs.eg.db", keyType = "SYMBOL" , ont = "BP")
barplot(go_res_as_th , showCategory = 20)

Plots of Differentially Alternative Spliced Regions (T Helper Cells)

th_cyba.plot <- rownames(subset(th_met_tumor.top, gene_name == "CYBA"))
th_ctla4.plot <- rownames(subset(th_met_tumor.top, gene_name == "CTLA4"))
th_hspa1b.plot <- rownames(subset(th_met_tumor.top, gene_name == "HSPA1B"))
PlotRelativeExpressionTSNE(peaks_sce, peaks.to.plot = th_cyba.plot)

PlotRelativeExpressionTSNE(peaks_sce, peaks.to.plot = th_ctla4.plot)

PlotRelativeExpressionTSNE(peaks_sce, peaks.to.plot = th_hspa1b.plot)

PlotRelativeExpressionBox(peaks_sce, peaks.to.plot = th_cyba.plot)

PlotRelativeExpressionBox(peaks_sce, peaks.to.plot = th_ctla4.plot)

PlotRelativeExpressionBox(peaks_sce, peaks.to.plot = th_hspa1b.plot)

Coverage Plots (Helper T Cells)

outdir = "bam_subsets/"
bam.files_hspa1b_th <- paste0(outdir, c("Helper_T_Cells_Tumor.HSPA1B.bam", "Helper_T_Cells_Metastasis.HSPA1B.bam"))
bam.files_rps27a_th <- paste0(outdir, c("Helper_T_Cells_Tumor.RPS27A.bam", "Helper_T_Cells_Metastasis.RPS27A.bam"))
bam.files_ctla4_th <- paste0(outdir, c("Helper_T_Cells_Tumor.CTLA4.bam", "Helper_T_Cells_Metastasis.CTLA4.bam"))
bam.files_cyba_th <- paste0(outdir, c("Helper_T_Cells_Tumor.CYBA.bam", "Helper_T_Cells_Metastasis.CYBA.bam"))
bam.files_ambra_th <- paste0(outdir, c("Helper_T_Cells_Tumor.AMBRA1.bam", "Helper_T_Cells_Metastasis.AMBRA1.bam"))
PlotCoverage(genome_gr = gtf_gr, 
             geneSymbol = "HSPA1B", 
             genome = "hg38", label.transcripts = TRUE,
             bamfiles = bam.files_hspa1b_th,
             bamfile.tracknames=c("Tumor", "Metastasis"),
             peaks.annot = rownames(subset(th_met_tumor.top, gene_name == "HSPA1B")))

PlotCoverage(genome_gr = gtf_gr, 
             geneSymbol = "RPS27A", 
             genome = "hg38", label.transcripts = TRUE,
             bamfiles = bam.files_rps27a_th,
             bamfile.tracknames=c("Tumor", "Metastasis"),
             peaks.annot = rownames(subset(th_met_tumor.top, gene_name == "RPS27A")))

PlotCoverage(genome_gr = gtf_gr, 
             geneSymbol = "CTLA4", 
             genome = "hg38", label.transcripts = TRUE,
             bamfiles = bam.files_ctla4_th,
             bamfile.tracknames=c("Tumor", "Metastasis") , peaks.annot = th_ctla4.plot)

PlotCoverage(genome_gr = gtf_gr, 
             geneSymbol = "CYBA", 
             genome = "hg38", label.transcripts = TRUE,
             bamfiles = bam.files_cyba_th,
             bamfile.tracknames=c("Tumor", "Metastasis") , peaks.annot = th_cyba.plot)

PlotCoverage(genome_gr = gtf_gr, 
             geneSymbol = "AMBRA1", 
             genome = "hg38", label.transcripts = TRUE,
             bamfiles = bam.files_ambra_th,
             bamfile.tracknames=c("Tumor", "Metastasis") , 
             peaks.annot = rownames(subset(th_met_tumor.top, gene_name == "AMBRA1")))

Heatmap of Selected Regions with AS Events and Genes With Selected Regions (T Helper Cells)

peak_counts_matrix <- counts(peaks_sce)
colnames(peak_counts_matrix) <- as.character(peaks_sce$CellIdent)
counts_of_selected_regions <- as.matrix(peak_counts_matrix[as.character(c(th_cyba.plot,th_ctla4.plot)),])
counts_of_selected_regions <- counts_of_selected_regions[,colnames(peak_counts_matrix) %like% 'Th_Met|Th_Tumor']

counts_of_selected_regions <- cbind(rowSums(counts_of_selected_regions[,colnames(counts_of_selected_regions) == "Th_Tumor"]),rowSums(counts_of_selected_regions[,colnames(counts_of_selected_regions) == "Th_Met"]))
colnames(counts_of_selected_regions) <- c("Helper T Cells (Tumor)" , "Helper T Cells (Metastasis)")
counts_of_selected_regions <- as.data.frame(melt(counts_of_selected_regions))
colnames(counts_of_selected_regions) <- c("Region" , "Cell_Label" , "Counts")

Heatmap of Counts of Genes With Selected Regions (T Helper Cells)

counts_matrix <- counts(merged_sce)
colnames(counts_matrix) <- as.character(peaks_sce$CellIdent)
counts_of_selected_genes <- as.matrix(counts_matrix[as.character(c("CYBA","CTLA4")),])
counts_of_selected_genes <- counts_of_selected_genes[,colnames(counts_matrix) %like% 'Th_Met|Th_Tumor']

counts_of_selected_genes <- cbind(rowSums(counts_of_selected_genes[,colnames(counts_of_selected_genes) == "Th_Tumor"]),rowSums(counts_of_selected_genes[,colnames(counts_of_selected_genes) == "Th_Met"]))
colnames(counts_of_selected_genes) <- c("Helper T Cells (Tumor)" , "Helper T Cells (Metastasis)")
counts_of_selected_genes <- as.data.frame(melt(counts_of_selected_genes))
colnames(counts_of_selected_genes) <- c("Gene" , "Cell_Label" , "Counts")
p1 <- ggplot(counts_of_selected_genes, aes(Cell_Label, Gene, fill= Counts , label = Counts)) + geom_tile(color = "black") + scale_fill_gradient(low = "white", high = "red") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + geom_text(size = 2)+ theme(legend.position = "none")

p2 <- ggplot(counts_of_selected_regions, aes(Cell_Label, Region, fill= Counts , label = Counts)) + geom_tile(color = "black") + scale_fill_gradient(low = "white", high = "red") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + geom_text(size = 2)+ theme(legend.position = "none")

grid.arrange(p1, p2, ncol = 2)

Differential Expression Analysis between Helper T Cells between Tumor and Metastasis Samples

load("th_de_results.RData")
results.classified_th <- DEsingle::DEtype(results = th_de_results, threshold = 0.01)
results.sig_th <- results.classified_th[results.classified_th$pvalue.adj.FDR < 0.01, ]
results.DEg_th <- results.sig_th[results.sig_th$Type == "DEg", ]
de_genes_th <- rownames(results.DEg_th)
go_res_de_th <- enrichGO(de_genes_th, OrgDb = "org.Hs.eg.db", keyType = "SYMBOL" , ont = "BP")
barplot(go_res_de_th , showCategory = 20)

Tumor and Metastatis Sample Comparisons (Epithelial Cells)

Comparison of Epithelial Cells of Tumor and Metastatsis Samples

ept_met_tumor <- DUTest(peaks_sce, 
                    population.1 = "EPITHELIAL_Met", 
                    population.2 = "EPITHELIAL_Tumor",
                    exp.thresh = 0.1, 
                    feature.type = c("UTR3", "exon" , "intron"))
## [1] "20514 expressed peaks in feature types UTR3, exon, intron"
## [1] "4275 genes detected with multiple peak sites expressed"
## [1] "16774 individual peak sites to test"
## [1] "Running DEXSeq test..."
ept_met_tumor.top <- subset(ept_met_tumor, abs(Log2_fold_change) > 1)

Results Table (Epithelial Cells) (First 50 results)

kable(ept_met_tumor.top[1:50,], digits = 4, "simple" , n = 100)
gene_name genomic_feature(s) population1_pct population2_pct pvalue padj Log2_fold_change
TXNRD1:12:104265566-104265807:1 TXNRD1 intron 0.7299 0.5766 0 0 -3.4320
AKR1C3:10:5064493-5064931:1 AKR1C3 intron 0.3721 0.0007 0 0 10.3704
TNFRSF21:6:47231532-47231881:-1 TNFRSF21 UTR3 0.3751 0.3197 0 0 -1.1936
AKR1C3:10:5068088-5068520:1 AKR1C3 intron 0.1733 0.0013 0 0 7.5762
SAT1:X:23785766-23786210:1 SAT1 UTR3 0.8909 0.6830 0 0 -1.0599
SAT1:X:23783583-23784004:1 SAT1 intron;exon 0.8593 0.6189 0 0 1.0556
RBM47:4:40423267-40423614:-1 RBM47 UTR3 0.5474 0.2972 0 0 -1.3072
C4orf3:4:119296995-119297488:-1 C4orf3 UTR3 0.4781 0.1308 0 0 1.4003
TYMP:22:50525752-50526123:-1 TYMP UTR3 0.3700 0.2576 0 0 -2.0714
CAPN2:1:223714205-223714634:1 CAPN2 intron 0.3976 0.0812 0 0 1.7690
AK2:1:33009827-33010425:-1 AK2 UTR3 0.4077 0.1083 0 0 1.6827
CFH:1:196689578-196701616:1 CFH UTR3 0.1366 0.1486 0 0 1.4370
ABCC3:17:50691160-50691814:1 ABCC3 UTR3 0.2375 0.2880 0 0 -1.7623
FOSB:19:45474146-45474668:1 FOSB UTR3 0.1427 0.1612 0 0 -2.2044
PPL:16:4882507-4882896:-1 PPL UTR3 0.2212 0.1413 0 0 -1.5160
HSBP1:16:83812556-83813069:1 HSBP1 UTR3 0.5321 0.1532 0 0 1.2173
CDH1:16:68835104-68835537:1 CDH1 UTR3 0.5892 0.4597 0 0 -1.0707
IP6K2:3:48688003-48688337:-1 IP6K2 UTR3 0.4312 0.1275 0 0 1.2623
SEMA4B:15:90229194-90229679:1 SEMA4B UTR3 0.4128 0.1955 0 0 -1.3951
NEDD4L:18:58254140-58254584:1 NEDD4L intron;exon 0.3935 0.1810 0 0 -1.3637
RNF145:5:159209141-159209693:-1 RNF145 UTR5;intron;exon 0.1886 0.2147 0 0 -2.1484
RNF145:5:159209204-159209588:-1 RNF145 UTR5;intron;exon 0.1876 0.2147 0 0 -2.1532
NXN:17:799310-799706:-1 NXN UTR3 0.2396 0.1017 0 0 -2.1063
CCND3:6:41991363-41991895:-1 CCND3 intron 0.5413 0.0641 0 0 1.5974
ACO2:22:41520131-41521616:1 ACO2 UTR3 0.3660 0.0482 0 0 2.1742
CCT3:1:156336901-156337361:-1 CCT3 UTR3 0.4628 0.0826 0 0 1.6287
TIMP3:22:32802080-32802541:1 TIMP3 intron;exon 0.4577 0.1030 0 0 -2.2879
CFDP1:16:75394579-75395281:-1 CFDP1 intron;exon 0.1305 0.0694 0 0 2.8660
ADNP:20:50890299-50890786:-1 ADNP UTR3 0.1702 0.1242 0 0 -2.8782
TXNRD1:12:104299350-104299794:1 TXNRD1 intron 0.3996 0.0271 0 0 1.4589
IP6K2:3:48693180-48694439:-1 IP6K2 UTR3 0.0663 0.1354 0 0 -2.2391
TXNRD1:12:104328229-104328823:1 TXNRD1 intron 0.3272 0.0086 0 0 2.4777
CYP24A1:20:54153446-54153804:-1 CYP24A1 UTR3 0.1978 0.0984 0 0 -1.5397
IP6K2:3:48693407-48693920:-1 IP6K2 UTR3 0.0601 0.1288 0 0 -2.2998
H2BC5:6:26170917-26171349:1 H2BC5 UTR3 0.2446 0.1691 0 0 -1.2526
TXNRD1:12:104329243-104329651:1 TXNRD1 intron 0.3364 0.0125 0 0 2.0541
ATP1A1:1:116376343-116376833:1 ATP1A1 intron 0.1193 0.1374 0 0 -1.8767
TUBB:6:30724969-30725454:1 TUBB UTR3 0.2457 0.2292 0 0 -1.0586
CLIP1:12:122271432-122271835:-1 CLIP1 UTR3 0.2406 0.1123 0 0 -1.6847
ATXN1:6:16432559-16486141:-1 ATXN1 UTR5;intron;exon 0.2324 0.1579 0 0 -1.4013
AKR1C1:10:4965849-4967061:1 AKR1C1 intron;exon 0.4893 0.1374 0 0 -1.3273
GALNT2:1:230281660-230282122:1 GALNT2 UTR3 0.2508 0.1222 0 0 -1.4192
SEPTIN9:17:77500158-77500596:1 SEPTIN9 UTR3 0.6422 0.3402 0 0 -1.4131
OGT:X:71575399-71575892:1 OGT UTR3 0.3537 0.1526 0 0 -1.1490
ACO2:22:41528459-41529030:1 ACO2 UTR3 0.2895 0.2305 0 0 -1.0308
PFKP:10:3100441-3100893:1 PFKP intron 0.2161 0.0588 0 0 1.8798
YBX3:12:10717763-10719195:-1 YBX3 intron;exon 0.4047 0.0680 0 0 1.5539
ACO2:22:41528002-41529041:1 ACO2 UTR3 0.2905 0.2312 0 0 -1.0231
TUBB2B:6:3224255-3224699:-1 TUBB2B UTR3 0.2049 0.0092 0 0 3.3602
TXNRD1:12:104298343-104298727:1 TXNRD1 intron 0.4128 0.0211 0 0 1.4842
de_genes_as_ept <- ept_met_tumor.top$gene_name
go_res_as_ept <- enrichGO(de_genes_as_ept, OrgDb = "org.Hs.eg.db", keyType = "SYMBOL" , ont = "BP")
barplot(go_res_as_ept , showCategory = 20)

Plots of Differentially Alternative Spliced Regions (Epithelial Cells)

ept_akr1c3.plot <- rownames(subset(ept_met_tumor.top, gene_name == "AKR1C3"))
ept_aco2.plot <- rownames(subset(ept_met_tumor.top, gene_name == "ACO2"))
ept_akr1c1.plot <- rownames(subset(ept_met_tumor.top, gene_name == "AKR1C1"))
ept_auts2.plot <- rownames(subset(ept_met_tumor.top, gene_name == "AUTS2")) 
ept_txnrd1.plot <- rownames(subset(ept_met_tumor.top, gene_name == "TXNRD1")) 
PlotRelativeExpressionTSNE(peaks_sce, peaks.to.plot = ept_akr1c3.plot)

PlotRelativeExpressionTSNE(peaks_sce, peaks.to.plot = ept_aco2.plot)

PlotRelativeExpressionTSNE(peaks_sce, peaks.to.plot = ept_akr1c1.plot)

PlotRelativeExpressionTSNE(peaks_sce, peaks.to.plot = ept_auts2.plot)

PlotRelativeExpressionTSNE(peaks_sce, peaks.to.plot = ept_txnrd1.plot)

PlotRelativeExpressionBox(peaks_sce, peaks.to.plot = ept_akr1c3.plot)

PlotRelativeExpressionBox(peaks_sce, peaks.to.plot = ept_aco2.plot)

PlotRelativeExpressionBox(peaks_sce, peaks.to.plot = ept_akr1c1.plot)

PlotRelativeExpressionBox(peaks_sce, peaks.to.plot = ept_auts2.plot)

PlotRelativeExpressionBox(peaks_sce, peaks.to.plot = ept_txnrd1.plot)

Coverage Plots (Epithelial Cells)

outdir = "bam_subsets/"
bam.files_txnrd1_ept <- paste0(outdir, c("Epithelial_Cells_Tumor.TXNRD1.bam", "Epithelial_Cells_Metastasis.TXNRD1.bam"))
bam.files_akr1c3_ept <- paste0(outdir, c("Epithelial_Cells_Tumor.AKR1C3.bam", "Epithelial_Cells_Metastasis.AKR1C3.bam"))
bam.files_sat1_ept <- paste0(outdir, c("Epithelial_Cells_Tumor.SAT1.bam", "Epithelial_Cells_Metastasis.SAT1.bam"))
bam.files_fosb_ept <- paste0(outdir, c("Epithelial_Cells_Tumor.FOSB.bam", "Epithelial_Cells_Metastasis.FOSB.bam"))
bam.files_aco2_ept <- paste0(outdir, c("Epithelial_Cells_Tumor.ACO2.bam", "Epithelial_Cells_Metastasis.ACO2.bam"))
PlotCoverage(genome_gr = gtf_gr, 
             geneSymbol = "TXNRD1", 
             genome = "hg38", label.transcripts = TRUE,
             bamfiles = bam.files_txnrd1_ept,
             bamfile.tracknames=c("Tumor", "Metastasis"),
             peaks.annot = rownames(subset(ept_met_tumor.top, gene_name == "TXNRD1")))

PlotCoverage(genome_gr = gtf_gr, 
             geneSymbol = "AKR1C3", 
             genome = "hg38", label.transcripts = TRUE,
             bamfiles = bam.files_akr1c3_ept,
             bamfile.tracknames=c("Tumor", "Metastasis"),
             peaks.annot = rownames(subset(ept_met_tumor.top, gene_name == "AKR1C3")))

PlotCoverage(genome_gr = gtf_gr, 
             geneSymbol = "SAT1", 
             genome = "hg38", label.transcripts = TRUE,
             bamfiles = bam.files_sat1_ept,
             bamfile.tracknames=c("Tumor", "Metastasis"),
             peaks.annot = rownames(subset(ept_met_tumor.top, gene_name == "SAT1")))

PlotCoverage(genome_gr = gtf_gr, 
             geneSymbol = "FOSB", 
             genome = "hg38", label.transcripts = TRUE,
             bamfiles = bam.files_fosb_ept,
             bamfile.tracknames=c("Tumor", "Metastasis"),
             peaks.annot = rownames(subset(ept_met_tumor.top, gene_name == "FOSB")))

# PlotCoverage(genome_gr = gtf_gr, 
#               geneSymbol = "ACO2", 
#               genome = "hg38", label.transcripts = TRUE,
#               bamfiles = bam.files_aco2_ept,
#               bamfile.tracknames=c("Tumor", "Metastasis"),
#               peaks.annot = rownames(subset(ept_met_tumor.top, gene_name == "ACO2")))

Heatmap of Selected Regions with AS Events and Genes With Selected Regions (Epithelial Cells)

peak_counts_matrix <- counts(peaks_sce)
colnames(peak_counts_matrix) <- as.character(peaks_sce$CellIdent)
counts_of_selected_regions <- as.matrix(peak_counts_matrix[as.character(c(ept_akr1c3.plot,ept_aco2.plot,ept_akr1c1.plot,
                                                                          ept_auts2.plot,
                                                                          ept_txnrd1.plot)),])
counts_of_selected_regions <- counts_of_selected_regions[,colnames(peak_counts_matrix) %like% 'EPITHELIAL_Met|EPITHELIAL_Tumor']

counts_of_selected_regions <- cbind(rowSums(counts_of_selected_regions[,colnames(counts_of_selected_regions) == "EPITHELIAL_Tumor"]),rowSums(counts_of_selected_regions[,colnames(counts_of_selected_regions) == "EPITHELIAL_Met"]))
colnames(counts_of_selected_regions) <- c("Epithelial Cells (Tumor)" , "Epithelial Cells (Metastasis)")
counts_of_selected_regions <- as.data.frame(melt(counts_of_selected_regions))
colnames(counts_of_selected_regions) <- c("Region" , "Cell_Label" , "Counts")

Heatmap of Counts of Genes With Selected Regions (Epithelial Cells)

counts_matrix <- counts(merged_sce)
colnames(counts_matrix) <- as.character(peaks_sce$CellIdent)
counts_of_selected_genes <- as.matrix(counts_matrix[as.character(c("AKR1C3","ACO2","AKR1C1","AUTS2","TXNRD1")),])
counts_of_selected_genes <- counts_of_selected_genes[,colnames(counts_matrix) %like% 'EPITHELIAL_Met|EPITHELIAL_Tumor']

counts_of_selected_genes <- cbind(rowSums(counts_of_selected_genes[,colnames(counts_of_selected_genes) == "EPITHELIAL_Tumor"]),rowSums(counts_of_selected_genes[,colnames(counts_of_selected_genes) == "EPITHELIAL_Met"]))
colnames(counts_of_selected_genes) <- c("Epithelial Cells (Tumor)" , "Epithelial Cells (Metastasis)")
counts_of_selected_genes <- as.data.frame(melt(counts_of_selected_genes))
colnames(counts_of_selected_genes) <- c("Gene" , "Cell_Label" , "Counts")
p1 <- ggplot(counts_of_selected_genes, aes(Cell_Label, Gene, fill= Counts , label = Counts)) + geom_tile(color = "black") + scale_fill_gradient(low = "white", high = "red") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + geom_text(size = 2)+ theme(legend.position = "none")

p2 <- ggplot(counts_of_selected_regions, aes(Cell_Label, Region, fill= Counts , label = Counts)) + geom_tile(color = "black") + scale_fill_gradient(low = "white", high = "red") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + geom_text(size = 2)+ theme(legend.position = "none")

grid.arrange(p1, p2, ncol = 2)

Differential Expression Analysis between Epithelial Cells between Tumor and Metastasis Samples

load("ept_de_results.RData")
results.classified_ept <- DEsingle::DEtype(results = ept_de_results, threshold = 0.01)
results.sig_ept <- results.classified_ept[results.classified_ept$pvalue.adj.FDR < 0.01, ]
results.DEg_ept <- results.sig_ept[results.sig_ept$Type == "DEg", ]
de_genes_ept <- rownames(results.DEg_ept)
go_res_de_ept <- enrichGO(de_genes_ept, OrgDb = "org.Hs.eg.db", keyType = "SYMBOL" , ont = "BP")
barplot(go_res_de_ept , showCategory = 20)

SUBCLUSTER AND TRAJECTORY ANALYSIS (Memory T Cells)

peak_counts_matrix <- counts(peaks_sce)
colnames(peak_counts_matrix) <- as.character(peaks_sce$CellIdent)
tm_sce_peak_counts <- counts(peaks_sce)[,colnames(peak_counts_matrix) %like% 'Tm_Met|Tm_Tumor']
tm_sce_peak_counts <- tm_sce_peak_counts[rowSums(tm_sce_peak_counts) > 7,]

tm_sce_peak <- SingleCellExperiment(assays = list(counts = tm_sce_peak_counts))
tm_sce_peak$cell_labels <- as.character(peaks_sce$CellIdent)[colnames(peak_counts_matrix) %like% 'Tm_Met|Tm_Tumor']

clusters <- quickCluster(tm_sce_peak , method = "igraph")
tm_sce_peak <- computeSumFactors(tm_sce_peak , cluster=clusters)
tm_sce_peak$clusters <- clusters
tm_sce_peak <- logNormCounts(tm_sce_peak)
dec <- modelGeneVarByPoisson(tm_sce_peak)
top <- getTopHVGs(dec , n = 4000)
tm_sce_peak <- denoisePCA(tm_sce_peak, subset.row=top, technical=dec)
tm_sce_peak <- runTSNE(tm_sce_peak)
tm_sce_peak <- runUMAP(tm_sce_peak)
plotPCA(tm_sce_peak , colour_by = "cell_labels")

plotUMAP(tm_sce_peak , colour_by = "cell_labels")

plotTSNE(tm_sce_peak , colour_by = "cell_labels")

library(slingshot)
library(RColorBrewer)
tm_sce_peak <- slingshot(tm_sce_peak, reducedDim='PCA' , clusterLabels = as.character(tm_sce_peak$cell_labels) , start.clus = "Tm_Tumor" , end.clus = "Tm_Met")

embedded <- embedCurves(tm_sce_peak, "UMAP")
embedded <- slingCurves(embedded)[[1]]
embedded <- data.frame(embedded$s[embedded$ord,])
plotUMAP(tm_sce_peak, colour_by="slingPseudotime_1") +
  geom_path(data=embedded, aes(x=Dim.1, y=Dim.2), linewidth=1)

library(tradeSeq)

# fit negative binomial GAM
sce <- fitGAM(counts = as.matrix(counts(tm_sce_peak)[top,]), sds = as.SlingshotDataSet(tm_sce_peak))
colnames(sce) <- tm_sce_peak$cell_labels
# test for dynamic expression
ATres_tm <- associationTest(sce)
ATres_tm <- ATres_tm[!(is.na(ATres_tm$pvalue)),]
ATres_tm$padj <- p.adjust(ATres_tm$pvalue)
ATres_tm <- ATres_tm[ATres_tm$meanLogFC > 1,]
topgenes <- rownames(ATres_tm[order(ATres_tm$padj), ])[1:50]
pst.ord <- order(tm_sce_peak$slingPseudotime_1, na.last = NA)
heatdata <- assays(tm_sce_peak)$counts[topgenes, pst.ord]
heatclus <- tm_sce_peak$cell_labels[pst.ord]
colnames(heatdata) <- tm_sce_peak$cell_labels
heatdata <- heatdata[ , order(colnames(heatdata))]
heatmap(log1p(as.matrix(heatdata)), Colv = NA, Rowv = NA, ColSideColors = brewer.pal(9,"Set1")[heatclus])

kable(ATres_tm[1:50,], digits = 4, "simple" , n = 100)
waldStat df pvalue meanLogFC padj
MT-CO1:MT:7034-7445:1 4564.5721 5 0 2.1038 0
HSPA1A:6:31817504-31817946:1 764.4294 5 0 3.0246 0
MT-CO1:MT:6504-7267:1 4118.2110 5 0 2.0657 0
JUNB:19:12792890-12793315:1 3608.8050 5 0 1.2618 0
HSP90AA1:14:102084861-102086016:-1 702.9817 5 0 3.0941 0
RPS26:12:56042140-56044274:1 1241.6470 5 0 5.4014 0
FTH1:11:61964510-61965102:-1 3398.9904 5 0 1.9014 0
MT-CO1:MT:6652-7208:1 3540.5284 5 0 1.9577 0
MT-CO3:MT:9540-9990:1 1455.3090 5 0 2.4399 0
RGS1:1:192579470-192580024:1 2323.4696 5 0 3.6202 0
ACTB:7:5527137-5527581:-1 1081.7246 5 0 5.4080 0
MT-CO2:MT:7859-8269:1 1783.8240 5 0 2.2161 0
MT-ND2:MT:5086-5511:1 1355.7948 5 0 2.4167 0
CREM:10:35206785-35212140:1 3494.5127 5 0 2.0962 0
KLRB1:12:9595207-9599795:-1 104.7249 5 0 4.8859 0
CCL5:17:35871491-35871840:-1 777.7726 5 0 5.4304 0
MT-CYB:MT:15450-15887:1 1230.2569 5 0 2.3603 0
MT-ND3:MT:10059-10404:1 1596.9076 5 0 2.1687 0
BTG1:12:92143918-92144376:-1 4292.1723 5 0 2.2845 0
MT-ATP6:MT:8805-9207:1 1446.6504 5 0 2.1098 0
LTB:6:31580525-31580955:-1 537.3039 5 0 5.7379 0
S100A4:1:153543613-153544685:-1 432.6943 5 0 6.4740 0
S100A4:1:153543613-153543921:-1 439.8091 5 0 6.4695 0
MT-ND4:MT:11699-12137:1 1720.9808 5 0 2.2563 0
SRGN:10:69104309-69104805:1 3460.9507 5 0 1.2236 0
MT-ND1:MT:3804-4262:1 1449.1800 5 0 2.7541 0
MT-ND4:MT:11137-11875:1 1576.1172 5 0 2.1683 0
ZFP36:19:39408952-39409412:1 3429.5910 5 0 2.1000 0
CCL4:17:36104582-36105621:1 533.9682 5 0 5.7116 0
CCL4:17:36105200-36105621:1 534.0629 5 0 5.7297 0
ZFP36L2:2:43224000-43224428:-1 2622.9707 5 0 1.1621 0
RPL3:22:39312882-39313702:-1 8488.9454 5 0 2.9296 0
RPL3:22:39312882-39313309:-1 5859.5678 5 0 2.9356 0
IL32:16:3069102-3069591:1 759.4712 5 0 6.0178 0
RPL21:13:27253840-27256629:1 6131.3842 5 0 3.7306 0
RPS12:6:132814970-132817564:1 6279.4670 5 0 3.7757 0
EEF1A1:6:73517465-73518061:-1 5637.5941 5 0 3.0648 0
RPL21:13:27255251-27256597:1 4521.8221 5 0 3.7546 0
TSC22D3:X:107713221-107713546:-1 1598.6706 5 0 1.7981 0
RPS6:9:19376184-19378727:-1 5755.8542 5 0 3.4574 0
RPS18:6:33272694-33276511:1 6911.1627 5 0 4.1537 0
RPS18:6:33275780-33276511:1 10814.0514 5 0 4.1404 0
BTG1:12:92143068-92143573:-1 3145.8809 5 0 2.6782 0
MT-ND2:MT:4733-5189:1 955.6573 5 0 1.8467 0
TPT1:13:45337122-45340004:-1 3195.3669 5 0 3.4640 0
RPS27:1:153990762-153992155:1 4373.4251 5 0 4.2233 0
MT-CYB:MT:15078-15576:1 1088.1994 5 0 1.9802 0
RPS29:14:49583524-49586533:-1 2518.5700 5 0 4.4243 0
RPL32:3:12835989-12840215:-1 6748.9485 5 0 3.2966 0
TMSB10:2:84905667-84906671:1 692.9494 5 0 3.9375 0
de_genes_pseudo_tm <- t(as.data.frame(str_split(rownames(ATres_tm),":")))[,1]
go_res_pseudo_tm <- enrichGO(de_genes_pseudo_tm, OrgDb = "org.Hs.eg.db", keyType = "SYMBOL" , ont = "BP")
barplot(go_res_pseudo_tm , showCategory = 20)

SUBCLUSTER ANALYSIS (Epithelial T Cells (Normal - Tumor - Metastasis))

peak_counts_matrix <- counts(peaks_sce)
colnames(peak_counts_matrix) <- as.character(peaks_sce$CellIdent)
ept_sce_peak_counts <- counts(peaks_sce)[,colnames(peak_counts_matrix) %like% 'EPITHELIAL_Met|EPITHELIAL_Tumor|EPITHELIAL_Normal']
ept_sce_peak_counts <- ept_sce_peak_counts[rowSums(ept_sce_peak_counts) > 7,]

ept_sce_peak <- SingleCellExperiment(assays = list(counts = ept_sce_peak_counts))
ept_sce_peak$cell_labels <- as.character(peaks_sce$CellIdent)[colnames(peak_counts_matrix) %like% 'EPITHELIAL_Met|EPITHELIAL_Tumor|EPITHELIAL_Normal']

clusters <- quickCluster(ept_sce_peak , method = "igraph")
ept_sce_peak <- computeSumFactors(ept_sce_peak , cluster=clusters)
ept_sce_peak$clusters <- clusters
ept_sce_peak <- logNormCounts(ept_sce_peak)
dec <- modelGeneVarByPoisson(ept_sce_peak)
top <- getTopHVGs(dec , n = 4000)
ept_sce_peak <- denoisePCA(ept_sce_peak, subset.row=top, technical=dec)
ept_sce_peak <- runTSNE(ept_sce_peak)
ept_sce_peak <- runUMAP(ept_sce_peak)
plotPCA(ept_sce_peak , colour_by = "cell_labels")

plotUMAP(ept_sce_peak , colour_by = "cell_labels")

plotTSNE(ept_sce_peak , colour_by = "cell_labels")

library(slingshot)
library(RColorBrewer)
ept_sce_peak <- slingshot(ept_sce_peak, reducedDim='PCA' , clusterLabels = as.character(ept_sce_peak$cell_labels) , start.clus = "EPITHELIAL_Normal" , end.clus = "EPITHELIAL_Met")

embedded <- embedCurves(ept_sce_peak, "UMAP")
embedded <- slingCurves(embedded)[[1]]
embedded <- data.frame(embedded$s[embedded$ord,])
plotUMAP(ept_sce_peak, colour_by="slingPseudotime_1") +
  geom_path(data=embedded, aes(x=Dim.1, y=Dim.2), linewidth=1)

library(tradeSeq)

# fit negative binomial GAM
sce <- fitGAM(counts = as.matrix(counts(ept_sce_peak)[top,]), sds = as.SlingshotDataSet(ept_sce_peak))
colnames(sce) <- ept_sce_peak$cell_labels
# test for dynamic expression
ATres_ept <- associationTest(sce)
ATres_ept <- ATres_ept[!(is.na(ATres_ept$pvalue)),]
ATres_ept$padj <- p.adjust(ATres_ept$pvalue)
ATres_ept <- ATres_ept[ATres_ept$meanLogFC > 1,]
topgenes <- rownames(ATres_ept[order(ATres_ept$pvalue), ])[1:100]
pst.ord <- order(ept_sce_peak$slingPseudotime_1, na.last = NA)
heatdata <- assays(ept_sce_peak)$counts[topgenes, pst.ord]
heatclus <- ept_sce_peak$cell_labels[pst.ord]
colnames(heatdata) <- ept_sce_peak$cell_labels
heatdata <- heatdata[ , order(colnames(heatdata))]
heatmap(log1p(as.matrix(heatdata)), Colv = NA, Rowv = NA, ColSideColors = brewer.pal(9,"Set1")[heatclus])

kable(ATres_ept[1:50,], digits = 4, "simple" , n = 100)
waldStat df pvalue meanLogFC padj
PGC:6:41736711-41737000:-1 1029.5933 5 0 1.5904 0
SCGB3A1:5:180590105-180590774:-1 2490.0308 5 0 4.1040 0
SFTPC:8:22163887-22164133:1 6174.9340 5 0 7.9799 0
SFTPB:2:85658562-85662231:-1 5669.4410 5 0 2.5528 0
SCGB1A1:11:62422196-62423195:1 16337.3109 5 0 6.3240 0
WFDC2:20:45470423-45481532:1 1706.8031 5 0 1.1953 0
SFTPA2:10:79555852-79556268:-1 979.6923 5 0 5.6949 0
SPINK1:5:147824572-147829629:-1 695.4665 5 0 1.5376 0
NAPSA:19:50358477-50359061:-1 1794.6798 5 0 3.0409 0
NAPSA:19:50358477-50358777:-1 2824.0092 5 0 3.0380 0
SFTPA1:10:79614990-79615455:1 2479.2673 5 0 6.5906 0
S100P:4:6693898-6697170:1 3101.9756 5 0 5.3582 0
SFTPB:2:85658150-85658822:-1 1828.9203 5 0 2.9809 0
SPINK4:9:33240166-33248567:1 474.3935 5 0 5.8297 0
CD74:5:150401637-150401973:-1 2657.4888 5 0 1.9319 0
S100P:4:6696824-6697170:1 2980.8485 5 0 5.2431 0
HOPX:4:56647988-56648340:-1 1621.3886 5 0 1.7282 0
IFI27:14:94114852-94116695:1 1225.0653 5 0 2.1889 0
IGFBP2:2:216663997-216664436:1 2473.7947 5 0 2.2276 0
IFI27:14:94115775-94116695:1 1409.7879 5 0 2.1720 0
LCN2:9:128151666-128153453:1 622.9587 5 0 2.3461 0
NPC2:14:74479885-74484466:-1 3508.9912 5 0 2.5386 0
HLA-DRA:6:32443855-32445046:1 5027.2332 5 0 2.2103 0
SFTPB:2:85657606-85658145:-1 1625.5975 5 0 2.8301 0
HLA-DRA:6:32444621-32445046:1 3081.8746 5 0 2.2134 0
RNASE1:14:20801310-20801788:-1 1516.6012 5 0 2.5578 0
RPS6:9:19376184-19378727:-1 4606.4075 5 0 1.0665 0
CYB5A:18:74253272-74253668:-1 3007.7724 5 0 1.6456 0
AKR1C1:10:4972618-4978020:1 2103.8101 5 0 2.3831 0
AKR1C1:10:4975759-4977982:1 682.7853 5 0 2.3953 0
PIGR:1:206928522-206928878:-1 354.2333 5 0 1.4147 0
SFTPA1:10:79614651-79615083:1 0.0000 5 1 6.8267 1
SERPINF1:17:1777143-1777565:1 684.3960 5 0 2.5297 0
LY6E:8:143021974-143022457:1 1096.5048 5 0 1.1597 0
EEF1A1:6:73517465-73518061:-1 4285.0858 5 0 1.0481 0
KRT7:12:52245547-52248969:1 1489.0571 5 0 1.4510 0
SFTPD:10:79937695-79938189:-1 1843.9981 5 0 4.4872 0
KRT7:12:52248177-52248947:1 0.0000 5 1 1.4554 1
MSLN:16:768340-768865:1 1566.4233 5 0 1.8803 0
CEACAM6:19:41771435-41772211:1 470.5990 5 0 1.9347 0
XAGE1A:X:52497275-52500812:1 0.0000 5 1 2.5085 1
TSPAN8:12:71125085-71125370:-1 1591.7143 5 0 2.4725 0
HLA-DRB1:6:32578633-32581697:-1 6005.2020 5 0 2.4420 0
HLA-DRB1:6:32578668-32580959:-1 2930.9694 5 0 2.4491 0
CTSH:15:78921723-78922198:-1 1665.2678 5 0 2.1827 0
CD55:1:207331092-207359974:1 1963.3329 5 0 1.3544 0
HLA-DRB1:6:32578773-32580252:-1 3115.5636 5 0 2.4654 0
SFTA2:6:30931353-30932036:-1 0.0000 5 1 1.6019 1
SFTA2:6:30931353-30931790:-1 1372.4962 5 0 1.6014 0
VMO1:17:4785285-4785639:-1 284.0572 5 0 2.6579 0
de_genes_pseudo_ept <- t(as.data.frame(str_split(rownames(ATres_ept),":")))[,1]
go_res_pseudo_ept <- enrichGO(de_genes_pseudo_ept, OrgDb = "org.Hs.eg.db", keyType = "SYMBOL" , ont = "BP")
barplot(go_res_pseudo_ept , showCategory = 20)

SUBCLUSTER ANALYSIS (Epithelial T Cells (Tumor - Metastasis))

peak_counts_matrix <- counts(peaks_sce)
colnames(peak_counts_matrix) <- as.character(peaks_sce$CellIdent)
ept2_sce_peak_counts <- counts(peaks_sce)[,colnames(peak_counts_matrix) %like% 'EPITHELIAL_Met|EPITHELIAL_Tumor']
ept2_sce_peak_counts <- ept2_sce_peak_counts[rowSums(ept2_sce_peak_counts) > 7,]

ept2_sce_peak <- SingleCellExperiment(assays = list(counts = ept2_sce_peak_counts))
ept2_sce_peak$cell_labels <- as.character(peaks_sce$CellIdent)[colnames(peak_counts_matrix) %like% 'EPITHELIAL_Met|EPITHELIAL_Tumor']

clusters <- quickCluster(ept2_sce_peak , method = "igraph")
ept2_sce_peak <- computeSumFactors(ept2_sce_peak , cluster=clusters)
ept2_sce_peak$clusters <- clusters
ept2_sce_peak <- logNormCounts(ept2_sce_peak)
dec <- modelGeneVarByPoisson(ept2_sce_peak)
top <- getTopHVGs(dec , n = 4000)
ept2_sce_peak <- denoisePCA(ept2_sce_peak, subset.row=top, technical=dec)
ept2_sce_peak <- runTSNE(ept2_sce_peak)
ept2_sce_peak <- runUMAP(ept2_sce_peak)
plotPCA(ept2_sce_peak , colour_by = "cell_labels")

plotUMAP(ept2_sce_peak , colour_by = "cell_labels")

plotTSNE(ept2_sce_peak , colour_by = "cell_labels")

library(slingshot)
library(RColorBrewer)
ept2_sce_peak <- slingshot(ept2_sce_peak, reducedDim='PCA' , clusterLabels = as.character(ept2_sce_peak$cell_labels) , start.clus = "EPITHELIAL_Tumor" , end.clus = "EPITHELIAL_Met")

embedded <- embedCurves(ept2_sce_peak, "UMAP")
embedded <- slingCurves(embedded)[[1]]
embedded <- data.frame(embedded$s[embedded$ord,])
plotUMAP(ept2_sce_peak, colour_by="slingPseudotime_1") +
  geom_path(data=embedded, aes(x=Dim.1, y=Dim.2), linewidth=1)

library(tradeSeq)

# fit negative binomial GAM
sce <- fitGAM(counts = as.matrix(counts(ept2_sce_peak)[top,]), sds = as.SlingshotDataSet(ept2_sce_peak))
colnames(sce) <- ept2_sce_peak$cell_labels
# test for dynamic expression
ATres_ept2 <- associationTest(sce)
ATres_ept2 <- ATres_ept2[!(is.na(ATres_ept2$pvalue)),]
ATres_ept2$padj <- p.adjust(ATres_ept2$pvalue)
ATres_ept2 <- ATres_ept2[ATres_ept2$meanLogFC > 1,]
topgenes <- rownames(ATres_ept2[order(ATres_ept2$pvalue), ])[1:100]
pst.ord <- order(ept2_sce_peak$slingPseudotime_1, na.last = NA)
heatdata <- assays(ept2_sce_peak)$counts[topgenes, pst.ord]
heatclus <- ept2_sce_peak$cell_labels[pst.ord]
colnames(heatdata) <- ept2_sce_peak$cell_labels
heatdata <- heatdata[ , order(colnames(heatdata))]
heatmap(log1p(as.matrix(heatdata)), Colv = NA, Rowv = NA, ColSideColors = brewer.pal(9,"Set3")[heatclus] , scale = "none")

kable(ATres_ept2[1:50,], digits = 4, "simple" , n = 100)
waldStat df pvalue meanLogFC padj
PGC:6:41736711-41737766:-1 1277.5491 5 0 2.2027 0
PGC:6:41736711-41737000:-1 1277.8313 5 0 2.1955 0
SCGB3A1:5:180590105-180590774:-1 1586.5862 5 0 1.9502 0
SFTPB:2:85658805-85659312:-1 1975.7574 5 0 1.9079 0
SPINK1:5:147824572-147831671:-1 721.9681 5 0 3.5798 0
SPINK4:9:33240166-33248567:1 945.3896 5 0 3.6358 0
S100P:4:6693898-6697170:1 3200.5643 5 0 2.2581 0
NAPSA:19:50358477-50359061:-1 1465.7034 5 0 2.0227 0
S100P:4:6696824-6697170:1 3016.9293 5 0 2.2227 0
SFTPB:2:85660165-85660981:-1 1326.8796 5 0 2.6839 0
SFTPB:2:85660315-85660717:-1 1333.7550 5 0 2.6380 0
SLPI:20:45252239-45253580:-1 768.9916 5 0 2.6550 0
FTL:19:48966284-48966879:1 2193.1718 5 0 1.3198 0
SFTPB:2:85658150-85658822:-1 1410.8368 5 0 1.8659 0
CD74:5:150401637-150401973:-1 1818.1578 5 0 2.1150 0
SOD3:4:24800362-24800842:1 1445.2993 5 0 2.6268 0
AGR2:7:16792612-16793014:-1 1147.8904 5 0 3.3719 0
IFI27:14:94114852-94116695:1 858.0862 5 0 2.6317 0
PPDPF:20:63521481-63522206:1 3467.5106 5 0 2.0845 0
TFF3:21:42312039-42315373:-1 198.8671 5 0 3.5735 0
TFF3:21:42312033-42313694:-1 208.0968 5 0 3.5863 0
SERPINF1:17:1777143-1777565:1 1206.0608 5 0 2.7086 0
RPL39:X:119786504-119790000:-1 2740.1048 5 0 2.4129 0
S100A6:1:153534599-153535280:-1 112.7421 5 0 2.9294 0
TMSB4X:X:12976812-12977227:1 1778.6640 5 0 2.4642 0
SFTPB:2:85657606-85658145:-1 1379.0654 5 0 1.7847 0
ATP5F1E:20:59028521-59032345:-1 3991.9835 5 0 1.6707 0
HLA-B:6:31353872-31354199:-1 1870.4677 5 0 2.4973 0
CLU:8:27597946-27599793:-1 561.5024 5 0 1.4735 0
CLU:8:27597929-27598644:-1 415.6819 5 0 1.4756 0
DHFR:5:80651395-80651616:-1 918.2876 5 0 1.2883 0
HLA-DRA:6:32443855-32445046:1 2034.5723 5 0 1.0603 0
B2M:15:44717694-44718214:1 3001.6915 5 0 2.8707 0
HLA-DRA:6:32444621-32445046:1 1985.7111 5 0 1.0573 0
RPS6:9:19376184-19378727:-1 3488.1565 5 0 1.6802 0
RPL41:12:56116625-56117848:1 16726.7135 5 0 2.1018 0
RPL10:X:154399881-154400938:1 4242.5284 5 0 1.6979 0
XAGE1A:X:52495834-52500807:1 1282.7822 5 0 1.8259 0
NPC2:14:74479885-74484466:-1 1895.6520 5 0 1.6551 0
CYB5A:18:74253272-74253668:-1 1811.5637 5 0 1.2801 0
NPC2:14:74479896-74480794:-1 1891.2206 5 0 1.6427 0
RPS2:16:1962058-1962611:-1 2548.6678 5 0 1.8344 0
RPL23A:17:28720787-28723972:1 2829.7605 5 0 2.4118 0
MT-CO1:MT:5904-7108:1 1094.8221 5 0 1.0076 0
TSPAN8:12:71125085-71125370:-1 1752.7212 5 0 3.0847 0
RPS27:1:153990762-153992155:1 5961.1191 5 0 2.9179 0
CEACAM6:19:41771435-41772211:1 277.5793 5 0 1.7011 0
TPT1:13:45337122-45340004:-1 3750.8162 5 0 2.7154 0
SCGB1A1:11:62419063-62423195:1 502.1654 5 0 3.5902 0
VMO1:17:4785285-4785639:-1 809.3609 5 0 1.4955 0
de_genes_pseudo_ept2 <- t(as.data.frame(str_split(rownames(ATres_ept2),":")))[,1]
go_res_pseudo_ept2 <- enrichGO(de_genes_pseudo_ept2, OrgDb = "org.Hs.eg.db", keyType = "SYMBOL" , ont = "BP")
barplot(go_res_pseudo_ept2 , showCategory = 20)

Extra Regional Plots

Memory T Cells

rps27a_regions <- rownames(tm_sce_peak)[grep("RPS27A" , rownames(tm_sce_peak))]

gridExtra::grid.arrange(
  plotUMAP(tm_sce_peak , colour_by = rps27a_regions[1] ,  shape_by = "cell_labels" , point_size = 2),
  plotUMAP(tm_sce_peak , colour_by = rps27a_regions[2], shape_by = "cell_labels" , point_size = 2),
  plotUMAP(tm_sce_peak , colour_by = rps27a_regions[3], shape_by = "cell_labels" , point_size = 2),
  ncol=1
)

coq10b_regions <- rownames(tm_sce_peak)[grep("COQ10B" , rownames(tm_sce_peak))]

gridExtra::grid.arrange(
  plotUMAP(tm_sce_peak , colour_by = coq10b_regions[5] ,  shape_by = "cell_labels" , point_size = 2),
  plotUMAP(tm_sce_peak , colour_by = coq10b_regions[2], shape_by = "cell_labels" , point_size = 2),
  plotUMAP(tm_sce_peak , colour_by = coq10b_regions[3], shape_by = "cell_labels" , point_size = 2),
  plotUMAP(tm_sce_peak , colour_by = coq10b_regions[4], shape_by = "cell_labels" , point_size = 2),
  ncol=1
)

krr1_regions <- rownames(tm_sce_peak)[grep("KRR1" , rownames(tm_sce_peak))]

gridExtra::grid.arrange(
  plotUMAP(tm_sce_peak , colour_by = krr1_regions[1] ,  shape_by = "cell_labels" , point_size = 2),
  plotUMAP(tm_sce_peak , colour_by = krr1_regions[2], shape_by = "cell_labels" , point_size = 2),
  plotUMAP(tm_sce_peak , colour_by = krr1_regions[3], shape_by = "cell_labels" , point_size = 2),
  plotUMAP(tm_sce_peak , colour_by = krr1_regions[4], shape_by = "cell_labels" , point_size = 2),
  plotUMAP(tm_sce_peak , colour_by = krr1_regions[5], shape_by = "cell_labels" , point_size = 2),
  plotUMAP(tm_sce_peak , colour_by = krr1_regions[6], shape_by = "cell_labels" , point_size = 2),
  plotUMAP(tm_sce_peak , colour_by = krr1_regions[7], shape_by = "cell_labels" , point_size = 2),
  plotUMAP(tm_sce_peak , colour_by = krr1_regions[8], shape_by = "cell_labels" , point_size = 2),
  ncol=2
)

ctla4_regions <- rownames(tm_sce_peak)[grep("CTLA4" , rownames(tm_sce_peak))]

gridExtra::grid.arrange(
  plotUMAP(tm_sce_peak , colour_by = ctla4_regions[1] ,  shape_by = "cell_labels"),
  plotUMAP(tm_sce_peak , colour_by = ctla4_regions[2], shape_by = "cell_labels"),
  plotUMAP(tm_sce_peak , colour_by = ctla4_regions[3], shape_by = "cell_labels"),
  ncol=1
)

Epithelial Cells

akr1c3_regions <- rownames(ept2_sce_peak)[grep("AKR1C3" , rownames(ept2_sce_peak))]

gridExtra::grid.arrange(
  plotUMAP(ept2_sce_peak , colour_by = "cell_labels"),
  plotUMAP(ept2_sce_peak , colour_by = akr1c3_regions[1] ,  shape_by = "cell_labels" , point_size = 2),
  plotUMAP(ept2_sce_peak , colour_by = akr1c3_regions[2], shape_by = "cell_labels" , point_size = 2),
  ncol=1
)

sat1_regions <- rownames(ept_sce_peak)[grep("SAT1" , rownames(ept_sce_peak))]

gridExtra::grid.arrange(
  plotUMAP(ept2_sce_peak , colour_by = "cell_labels"),
  plotUMAP(ept2_sce_peak , colour_by = sat1_regions[1] , point_size = 2),
  plotUMAP(ept2_sce_peak , colour_by = sat1_regions[2] , point_size = 2),
  ncol=1
)

fosb_regions <- rownames(ept2_sce_peak)[grep("FOSB" , rownames(ept2_sce_peak))]

gridExtra::grid.arrange(
  plotUMAP(ept2_sce_peak , colour_by = "cell_labels"),
  plotUMAP(ept2_sce_peak , colour_by = fosb_regions[1] , point_size = 2),
  plotUMAP(ept2_sce_peak , colour_by = fosb_regions[2] , point_size = 2),
  plotUMAP(ept2_sce_peak , colour_by = fosb_regions[3] , point_size = 2),
  plotUMAP(ept2_sce_peak , colour_by = fosb_regions[5] , point_size = 2),
  plotUMAP(ept2_sce_peak , colour_by = fosb_regions[6] , point_size = 2),
  ncol=2
)

aco2_regions <- rownames(ept2_sce_peak)[grep("ACO2" , rownames(ept2_sce_peak))]

gridExtra::grid.arrange(
  plotUMAP(ept2_sce_peak , colour_by = "cell_labels"),
  plotUMAP(ept2_sce_peak , colour_by = aco2_regions[1] , point_size = 2),
  plotUMAP(ept2_sce_peak , colour_by = aco2_regions[2] , point_size = 2),
  plotUMAP(ept2_sce_peak , colour_by = aco2_regions[3] , point_size = 2),
  plotUMAP(ept2_sce_peak , colour_by = aco2_regions[4] , point_size = 2),
  plotUMAP(ept2_sce_peak , colour_by = aco2_regions[5] , point_size = 2),
  plotUMAP(ept2_sce_peak , colour_by = aco2_regions[6] , point_size = 2),
  plotUMAP(ept2_sce_peak , colour_by = aco2_regions[7] , point_size = 2),
  ncol=2
)